{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:11: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
      "of pandas will change to not sort by default.\n",
      "\n",
      "To accept the future behavior, pass 'sort=False'.\n",
      "\n",
      "To retain the current behavior and silence the warning, pass 'sort=True'.\n",
      "\n",
      "  # This is added back by InteractiveShellApp.init_path()\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import KFold,GridSearchCV,train_test_split\n",
    "from sklearn.metrics import mean_absolute_error\n",
    "from xgboost import XGBRegressor\n",
    "\n",
    "train = pd.read_csv('../data/used_car_train_20200313.csv', sep=' ')\n",
    "test = pd.read_csv('../data/used_car_testB_20200421.csv', sep=' ')\n",
    "\n",
    "# 合并训练数据和测试数据集\n",
    "all_data = pd.concat([train, test], ignore_index=True)\n",
    "\n",
    "# 对 price 做对数变换\n",
    "all_data['price'] = np.log1p(all_data['price'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 处理异常值，如功率大于 600 的值\n",
    "all_data['power'] = all_data['power'].apply(lambda x: 600 if x > 600 else x)\n",
    "\n",
    "# 处理日期相关信息\n",
    "all_data['reg_year'] = all_data['regDate'].apply(lambda x: int(str(x)[:4]))\n",
    "all_data['reg_month'] = all_data['regDate'].apply(lambda x: int(str(x)[4:6]))\n",
    "all_data['reg_day'] = all_data['regDate'].apply(lambda x: int(str(x)[6:]))\n",
    "all_data['creat_year'] = all_data['creatDate'].apply(lambda x: int(str(x)[:4]))\n",
    "all_data['creat_month'] = all_data['creatDate'].apply(lambda x: int(str(x)[4:6]))\n",
    "all_data['creat_day'] = all_data['creatDate'].apply(lambda x: int(str(x)[6:]))\n",
    "\n",
    "#使用时长\n",
    "all_data['used_time'] = (pd.to_datetime(all_data['creatDate'], format='%Y%m%d', errors='coerce') - \n",
    "                            pd.to_datetime(all_data['regDate'], format='%Y%m%d', errors='coerce')).dt.days\n",
    "\n",
    "\n",
    "# 标记汽车没有经过维修\n",
    "all_data['notRepairedDamage'] = all_data['notRepairedDamage'].apply(lambda x: 0 if x == '-' else 1)\n",
    "\n",
    "# 对可分类的连续特征进行分桶，如将功率（power）分成10个分桶，并提取新特征\n",
    "all_data['power_bucket'] = pd.cut(all_data['power'], 10, labels=False)\n",
    "new_cols = ['power_bucket', 'v_0', 'v_4', 'v_8', 'v_12'] \n",
    "for col1 in new_cols:\n",
    "    for col2 in new_cols:\n",
    "        if col1 != col2:\n",
    "            all_data['{}_{}_add'.format(col1, col2)] = all_data[col1] + all_data[col2]\n",
    "            all_data['{}_{}_sub'.format(col1, col2)] = all_data[col1] - all_data[col2]\n",
    "\n",
    "# 处理缺失值\n",
    "all_data['fuelType'] = all_data['fuelType'].fillna(0)\n",
    "all_data['gearbox'] = all_data['gearbox'].fillna(0)\n",
    "all_data['bodyType'] = all_data['bodyType'].fillna(0)\n",
    "all_data['model'] = all_data['model'].fillna(0)\n",
    "\n",
    "# 分离特征和标签\n",
    "train_data = all_data[~all_data['price'].isnull()]\n",
    "test_data = all_data[all_data['price'].isnull()]\n",
    "X_train = train_data.drop(['SaleID', 'name', 'regDate', 'creatDate','seller','offerType','power', 'price','v_2','v_1','regionCode'], axis=1)\n",
    "X_test = test_data.drop(['SaleID', 'name', 'regDate', 'creatDate','seller','offerType','power','price','v_2','v_1','regionCode'], axis=1)\n",
    "y_train = train_data['price']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from lightgbm.sklearn import LGBMRegressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------- 第 1 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.131353\n",
      "[600]\tvalid_0's l1: 0.125356\n",
      "[900]\tvalid_0's l1: 0.122609\n",
      "[1200]\tvalid_0's l1: 0.12098\n",
      "[1500]\tvalid_0's l1: 0.119766\n",
      "[1800]\tvalid_0's l1: 0.118995\n",
      "[2100]\tvalid_0's l1: 0.118401\n",
      "[2400]\tvalid_0's l1: 0.117967\n",
      "[2700]\tvalid_0's l1: 0.117514\n",
      "[3000]\tvalid_0's l1: 0.11722\n",
      "[3300]\tvalid_0's l1: 0.11693\n",
      "[3600]\tvalid_0's l1: 0.1167\n",
      "[3900]\tvalid_0's l1: 0.116608\n",
      "[4200]\tvalid_0's l1: 0.11642\n",
      "[4500]\tvalid_0's l1: 0.116284\n",
      "[4800]\tvalid_0's l1: 0.116197\n",
      "[5100]\tvalid_0's l1: 0.116125\n",
      "[5400]\tvalid_0's l1: 0.115942\n",
      "[5700]\tvalid_0's l1: 0.115889\n",
      "[6000]\tvalid_0's l1: 0.11582\n",
      "[6300]\tvalid_0's l1: 0.115719\n",
      "[6600]\tvalid_0's l1: 0.1157\n",
      "[6900]\tvalid_0's l1: 0.115628\n",
      "[7200]\tvalid_0's l1: 0.115649\n",
      "--------------------- 第 2 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.132667\n",
      "[600]\tvalid_0's l1: 0.126021\n",
      "[900]\tvalid_0's l1: 0.123213\n",
      "[1200]\tvalid_0's l1: 0.121374\n",
      "[1500]\tvalid_0's l1: 0.120312\n",
      "[1800]\tvalid_0's l1: 0.119511\n",
      "[2100]\tvalid_0's l1: 0.118907\n",
      "[2400]\tvalid_0's l1: 0.118506\n",
      "[2700]\tvalid_0's l1: 0.118174\n",
      "[3000]\tvalid_0's l1: 0.1178\n",
      "[3300]\tvalid_0's l1: 0.11756\n",
      "[3600]\tvalid_0's l1: 0.117345\n",
      "[3900]\tvalid_0's l1: 0.117187\n",
      "[4200]\tvalid_0's l1: 0.117045\n",
      "[4500]\tvalid_0's l1: 0.116891\n",
      "[4800]\tvalid_0's l1: 0.116796\n",
      "[5100]\tvalid_0's l1: 0.116648\n",
      "[5400]\tvalid_0's l1: 0.116544\n",
      "[5700]\tvalid_0's l1: 0.116421\n",
      "[6000]\tvalid_0's l1: 0.116331\n",
      "[6300]\tvalid_0's l1: 0.116267\n",
      "[6600]\tvalid_0's l1: 0.116221\n",
      "[6900]\tvalid_0's l1: 0.116138\n",
      "[7200]\tvalid_0's l1: 0.116067\n",
      "[7500]\tvalid_0's l1: 0.115997\n",
      "[7800]\tvalid_0's l1: 0.115942\n",
      "[8100]\tvalid_0's l1: 0.115944\n",
      "[8400]\tvalid_0's l1: 0.115874\n",
      "[8700]\tvalid_0's l1: 0.115815\n",
      "[9000]\tvalid_0's l1: 0.115821\n",
      "--------------------- 第 3 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.134821\n",
      "[600]\tvalid_0's l1: 0.128441\n",
      "[900]\tvalid_0's l1: 0.125421\n",
      "[1200]\tvalid_0's l1: 0.123822\n",
      "[1500]\tvalid_0's l1: 0.122556\n",
      "[1800]\tvalid_0's l1: 0.121806\n",
      "[2100]\tvalid_0's l1: 0.121239\n",
      "[2400]\tvalid_0's l1: 0.12071\n",
      "[2700]\tvalid_0's l1: 0.120308\n",
      "[3000]\tvalid_0's l1: 0.119984\n",
      "[3300]\tvalid_0's l1: 0.11967\n",
      "[3600]\tvalid_0's l1: 0.11948\n",
      "[3900]\tvalid_0's l1: 0.119298\n",
      "[4200]\tvalid_0's l1: 0.119097\n",
      "[4500]\tvalid_0's l1: 0.11898\n",
      "[4800]\tvalid_0's l1: 0.118925\n",
      "[5100]\tvalid_0's l1: 0.118823\n",
      "[5400]\tvalid_0's l1: 0.118761\n",
      "[5700]\tvalid_0's l1: 0.11868\n",
      "[6000]\tvalid_0's l1: 0.11857\n",
      "[6300]\tvalid_0's l1: 0.118466\n",
      "[6600]\tvalid_0's l1: 0.11844\n",
      "[6900]\tvalid_0's l1: 0.118317\n",
      "[7200]\tvalid_0's l1: 0.118251\n",
      "[7500]\tvalid_0's l1: 0.118204\n",
      "[7800]\tvalid_0's l1: 0.11814\n",
      "[8100]\tvalid_0's l1: 0.118068\n",
      "[8400]\tvalid_0's l1: 0.118039\n",
      "[8700]\tvalid_0's l1: 0.118026\n",
      "[9000]\tvalid_0's l1: 0.11803\n",
      "[9300]\tvalid_0's l1: 0.11796\n",
      "[9600]\tvalid_0's l1: 0.117971\n",
      "[9900]\tvalid_0's l1: 0.117933\n",
      "--------------------- 第 4 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.134933\n",
      "[600]\tvalid_0's l1: 0.128705\n",
      "[900]\tvalid_0's l1: 0.125807\n",
      "[1200]\tvalid_0's l1: 0.124153\n",
      "[1500]\tvalid_0's l1: 0.122972\n",
      "[1800]\tvalid_0's l1: 0.122102\n",
      "[2100]\tvalid_0's l1: 0.12143\n",
      "[2400]\tvalid_0's l1: 0.120958\n",
      "[2700]\tvalid_0's l1: 0.120529\n",
      "[3000]\tvalid_0's l1: 0.120196\n",
      "[3300]\tvalid_0's l1: 0.119804\n",
      "[3600]\tvalid_0's l1: 0.119572\n",
      "[3900]\tvalid_0's l1: 0.119376\n",
      "[4200]\tvalid_0's l1: 0.119127\n",
      "[4500]\tvalid_0's l1: 0.118995\n",
      "[4800]\tvalid_0's l1: 0.118875\n",
      "[5100]\tvalid_0's l1: 0.118783\n",
      "[5400]\tvalid_0's l1: 0.118671\n",
      "[5700]\tvalid_0's l1: 0.118543\n",
      "[6000]\tvalid_0's l1: 0.118477\n",
      "[6300]\tvalid_0's l1: 0.11842\n",
      "[6600]\tvalid_0's l1: 0.118341\n",
      "[6900]\tvalid_0's l1: 0.118249\n",
      "[7200]\tvalid_0's l1: 0.118142\n",
      "[7500]\tvalid_0's l1: 0.118123\n",
      "[7800]\tvalid_0's l1: 0.11806\n",
      "[8100]\tvalid_0's l1: 0.117997\n",
      "[8400]\tvalid_0's l1: 0.117945\n",
      "[8700]\tvalid_0's l1: 0.117891\n",
      "[9000]\tvalid_0's l1: 0.117833\n",
      "[9300]\tvalid_0's l1: 0.117793\n",
      "[9600]\tvalid_0's l1: 0.117771\n",
      "[9900]\tvalid_0's l1: 0.117743\n",
      "--------------------- 第 5 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.134545\n",
      "[600]\tvalid_0's l1: 0.128327\n",
      "[900]\tvalid_0's l1: 0.125381\n",
      "[1200]\tvalid_0's l1: 0.123603\n",
      "[1500]\tvalid_0's l1: 0.122254\n",
      "[1800]\tvalid_0's l1: 0.121399\n",
      "[2100]\tvalid_0's l1: 0.120645\n",
      "[2400]\tvalid_0's l1: 0.120056\n",
      "[2700]\tvalid_0's l1: 0.119707\n",
      "[3000]\tvalid_0's l1: 0.119396\n",
      "[3300]\tvalid_0's l1: 0.119123\n",
      "[3600]\tvalid_0's l1: 0.118847\n",
      "[3900]\tvalid_0's l1: 0.11865\n",
      "[4200]\tvalid_0's l1: 0.118471\n",
      "[4500]\tvalid_0's l1: 0.118274\n",
      "[4800]\tvalid_0's l1: 0.118201\n",
      "[5100]\tvalid_0's l1: 0.118053\n",
      "[5400]\tvalid_0's l1: 0.117936\n",
      "[5700]\tvalid_0's l1: 0.117794\n",
      "[6000]\tvalid_0's l1: 0.117687\n",
      "[6300]\tvalid_0's l1: 0.117584\n",
      "[6600]\tvalid_0's l1: 0.117458\n",
      "[6900]\tvalid_0's l1: 0.117358\n",
      "[7200]\tvalid_0's l1: 0.117367\n",
      "--------------------- 第 6 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n",
      "[300]\tvalid_0's l1: 0.130843\n",
      "[600]\tvalid_0's l1: 0.124544\n",
      "[900]\tvalid_0's l1: 0.121763\n",
      "[1200]\tvalid_0's l1: 0.120162\n",
      "[1500]\tvalid_0's l1: 0.118966\n",
      "[1800]\tvalid_0's l1: 0.118171\n",
      "[2100]\tvalid_0's l1: 0.117671\n",
      "[2400]\tvalid_0's l1: 0.117249\n",
      "[2700]\tvalid_0's l1: 0.116855\n",
      "[3000]\tvalid_0's l1: 0.116662\n",
      "[3300]\tvalid_0's l1: 0.116428\n",
      "[3600]\tvalid_0's l1: 0.116207\n",
      "[3900]\tvalid_0's l1: 0.116043\n",
      "[4200]\tvalid_0's l1: 0.115962\n",
      "[4500]\tvalid_0's l1: 0.115824\n",
      "[4800]\tvalid_0's l1: 0.115742\n",
      "[5100]\tvalid_0's l1: 0.115604\n",
      "[5400]\tvalid_0's l1: 0.115459\n",
      "[5700]\tvalid_0's l1: 0.115399\n",
      "[6000]\tvalid_0's l1: 0.115384\n",
      "--------------------- 第 7 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.133536\n",
      "[600]\tvalid_0's l1: 0.12739\n",
      "[900]\tvalid_0's l1: 0.124784\n",
      "[1200]\tvalid_0's l1: 0.123253\n",
      "[1500]\tvalid_0's l1: 0.122211\n",
      "[1800]\tvalid_0's l1: 0.121482\n",
      "[2100]\tvalid_0's l1: 0.120729\n",
      "[2400]\tvalid_0's l1: 0.120235\n",
      "[2700]\tvalid_0's l1: 0.119755\n",
      "[3000]\tvalid_0's l1: 0.119468\n",
      "[3300]\tvalid_0's l1: 0.119168\n",
      "[3600]\tvalid_0's l1: 0.118954\n",
      "[3900]\tvalid_0's l1: 0.118743\n",
      "[4200]\tvalid_0's l1: 0.118554\n",
      "[4500]\tvalid_0's l1: 0.118464\n",
      "[4800]\tvalid_0's l1: 0.118305\n",
      "[5100]\tvalid_0's l1: 0.118175\n",
      "[5400]\tvalid_0's l1: 0.118068\n",
      "[5700]\tvalid_0's l1: 0.117965\n",
      "[6000]\tvalid_0's l1: 0.117826\n",
      "[6300]\tvalid_0's l1: 0.11776\n",
      "[6600]\tvalid_0's l1: 0.117706\n",
      "[6900]\tvalid_0's l1: 0.117651\n",
      "[7200]\tvalid_0's l1: 0.117593\n",
      "[7500]\tvalid_0's l1: 0.117531\n",
      "[7800]\tvalid_0's l1: 0.117451\n",
      "[8100]\tvalid_0's l1: 0.117424\n",
      "[8400]\tvalid_0's l1: 0.117394\n",
      "[8700]\tvalid_0's l1: 0.11737\n",
      "--------------------- 第 8 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.131478\n",
      "[600]\tvalid_0's l1: 0.124884\n",
      "[900]\tvalid_0's l1: 0.121916\n",
      "[1200]\tvalid_0's l1: 0.120214\n",
      "[1500]\tvalid_0's l1: 0.119053\n",
      "[1800]\tvalid_0's l1: 0.118153\n",
      "[2100]\tvalid_0's l1: 0.117644\n",
      "[2400]\tvalid_0's l1: 0.117171\n",
      "[2700]\tvalid_0's l1: 0.116775\n",
      "[3000]\tvalid_0's l1: 0.11635\n",
      "[3300]\tvalid_0's l1: 0.116092\n",
      "[3600]\tvalid_0's l1: 0.115885\n",
      "[3900]\tvalid_0's l1: 0.115717\n",
      "[4200]\tvalid_0's l1: 0.115568\n",
      "[4500]\tvalid_0's l1: 0.11539\n",
      "[4800]\tvalid_0's l1: 0.115291\n",
      "[5100]\tvalid_0's l1: 0.115174\n",
      "[5400]\tvalid_0's l1: 0.115064\n",
      "[5700]\tvalid_0's l1: 0.114961\n",
      "[6000]\tvalid_0's l1: 0.114864\n",
      "[6300]\tvalid_0's l1: 0.114818\n",
      "[6600]\tvalid_0's l1: 0.114794\n",
      "[6900]\tvalid_0's l1: 0.114751\n",
      "[7200]\tvalid_0's l1: 0.114686\n",
      "[7500]\tvalid_0's l1: 0.114641\n",
      "[7800]\tvalid_0's l1: 0.114603\n",
      "[8100]\tvalid_0's l1: 0.114567\n",
      "[8400]\tvalid_0's l1: 0.11454\n",
      "--------------------- 第 9 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.132068\n",
      "[600]\tvalid_0's l1: 0.125627\n",
      "[900]\tvalid_0's l1: 0.123012\n",
      "[1200]\tvalid_0's l1: 0.121332\n",
      "[1500]\tvalid_0's l1: 0.120109\n",
      "[1800]\tvalid_0's l1: 0.119348\n",
      "[2100]\tvalid_0's l1: 0.118676\n",
      "[2400]\tvalid_0's l1: 0.118174\n",
      "[2700]\tvalid_0's l1: 0.117811\n",
      "[3000]\tvalid_0's l1: 0.117631\n",
      "[3300]\tvalid_0's l1: 0.1174\n",
      "[3600]\tvalid_0's l1: 0.117223\n",
      "[3900]\tvalid_0's l1: 0.116992\n",
      "[4200]\tvalid_0's l1: 0.116832\n",
      "[4500]\tvalid_0's l1: 0.116753\n",
      "[4800]\tvalid_0's l1: 0.116587\n",
      "[5100]\tvalid_0's l1: 0.116505\n",
      "[5400]\tvalid_0's l1: 0.116464\n",
      "[5700]\tvalid_0's l1: 0.116334\n",
      "[6000]\tvalid_0's l1: 0.116245\n",
      "[6300]\tvalid_0's l1: 0.116159\n",
      "[6600]\tvalid_0's l1: 0.116116\n",
      "[6900]\tvalid_0's l1: 0.116036\n",
      "[7200]\tvalid_0's l1: 0.115982\n",
      "[7500]\tvalid_0's l1: 0.115972\n",
      "[7800]\tvalid_0's l1: 0.115918\n",
      "[8100]\tvalid_0's l1: 0.115868\n",
      "[8400]\tvalid_0's l1: 0.115847\n",
      "[8700]\tvalid_0's l1: 0.115821\n",
      "[9000]\tvalid_0's l1: 0.115767\n",
      "[9300]\tvalid_0's l1: 0.115733\n",
      "[9600]\tvalid_0's l1: 0.115692\n",
      "[9900]\tvalid_0's l1: 0.115672\n",
      "--------------------- 第 10 折 ---------------------\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n",
      "[300]\tvalid_0's l1: 0.13096\n",
      "[600]\tvalid_0's l1: 0.124698\n",
      "[900]\tvalid_0's l1: 0.121977\n",
      "[1200]\tvalid_0's l1: 0.120064\n",
      "[1500]\tvalid_0's l1: 0.11874\n",
      "[1800]\tvalid_0's l1: 0.117889\n",
      "[2100]\tvalid_0's l1: 0.117207\n",
      "[2400]\tvalid_0's l1: 0.116707\n",
      "[2700]\tvalid_0's l1: 0.116367\n",
      "[3000]\tvalid_0's l1: 0.116002\n",
      "[3300]\tvalid_0's l1: 0.115798\n",
      "[3600]\tvalid_0's l1: 0.115566\n",
      "[3900]\tvalid_0's l1: 0.115468\n",
      "[4200]\tvalid_0's l1: 0.115325\n",
      "[4500]\tvalid_0's l1: 0.115244\n",
      "[4800]\tvalid_0's l1: 0.115168\n",
      "[5100]\tvalid_0's l1: 0.115023\n",
      "[5400]\tvalid_0's l1: 0.114911\n",
      "[5700]\tvalid_0's l1: 0.114796\n",
      "[6000]\tvalid_0's l1: 0.114747\n",
      "[6300]\tvalid_0's l1: 0.114715\n",
      "[6600]\tvalid_0's l1: 0.114685\n",
      "[6900]\tvalid_0's l1: 0.114626\n",
      "[7200]\tvalid_0's l1: 0.114596\n",
      "[7500]\tvalid_0's l1: 0.114562\n",
      "[7800]\tvalid_0's l1: 0.114494\n",
      "[8100]\tvalid_0's l1: 0.114499\n",
      "[8400]\tvalid_0's l1: 0.114448\n",
      "交叉验证 MAE: 490.5802616407327\n"
     ]
    }
   ],
   "source": [
    "# 十折交叉检验\n",
    "cols = list(X_train)\n",
    "oof = np.zeros(X_train.shape[0])\n",
    "sub = test[['SaleID']].copy()\n",
    "sub['price'] = 0\n",
    "feat_df = pd.DataFrame({'feat': cols, 'imp': 0})\n",
    "skf = KFold(n_splits=10, shuffle=True, random_state=17)\n",
    "\n",
    "\n",
    "#最优情况\n",
    "clf = LGBMRegressor(\n",
    "    n_estimators=10000,\n",
    "    learning_rate=0.07, \n",
    "    boosting_type='gbdt',\n",
    "    objective='regression_l1',\n",
    "    max_depth=-1,\n",
    "    num_leaves=31,\n",
    "    min_child_samples=20,\n",
    "    feature_fraction=0.8,\n",
    "    bagging_freq=1,\n",
    "    bagging_fraction=0.8,\n",
    "    lambda_l2=2,\n",
    "    random_state=17,\n",
    "    metric='mae'\n",
    ")\n",
    "\n",
    "mae = 0\n",
    "for i, (trn_idx, val_idx) in enumerate(skf.split(X_train, y_train)):\n",
    "    print('--------------------- 第 {} 折 ---------------------'.format(i + 1))\n",
    "    trn_x, trn_y = X_train.iloc[trn_idx].reset_index(drop=True), y_train[trn_idx]\n",
    "    val_x, val_y = X_train.iloc[val_idx].reset_index(drop=True), y_train[val_idx]\n",
    "    clf.fit(\n",
    "        trn_x, trn_y,\n",
    "        eval_set=[(val_x, val_y)],\n",
    "        eval_metric='mae',\n",
    "        early_stopping_rounds=300,\n",
    "        verbose=300\n",
    "    )\n",
    "\n",
    "    sub['price'] += np.expm1(clf.predict(X_test)) / skf.n_splits\n",
    "    oof[val_idx] = clf.predict(val_x)\n",
    "    mae += mean_absolute_error(np.expm1(val_y), np.expm1(oof[val_idx])) / skf.n_splits\n",
    "\n",
    "print('交叉验证 MAE:', mae)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 生成提交文件\n",
    "sub.to_csv('lgb.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 12 candidates, totalling 60 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.\n",
      "[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  7.0min\n",
      "[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 16.8min finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n",
      "最优参数： {'feature_fraction': 0.8, 'learning_rate': 0.01, 'max_depth': 10}\n",
      "--------------------- 第 1 折 ---------------------\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.205491\n",
      "[600]\tvalid_0's l1: 0.154611\n",
      "[900]\tvalid_0's l1: 0.143219\n",
      "[1200]\tvalid_0's l1: 0.137626\n",
      "[1500]\tvalid_0's l1: 0.13387\n",
      "[1800]\tvalid_0's l1: 0.13122\n",
      "[2100]\tvalid_0's l1: 0.129292\n",
      "[2400]\tvalid_0's l1: 0.127862\n",
      "[2700]\tvalid_0's l1: 0.126668\n",
      "[3000]\tvalid_0's l1: 0.125676\n",
      "[3300]\tvalid_0's l1: 0.124846\n",
      "[3600]\tvalid_0's l1: 0.124155\n",
      "[3900]\tvalid_0's l1: 0.123533\n",
      "[4200]\tvalid_0's l1: 0.122983\n",
      "[4500]\tvalid_0's l1: 0.122471\n",
      "[4800]\tvalid_0's l1: 0.121974\n",
      "[5100]\tvalid_0's l1: 0.121568\n",
      "[5400]\tvalid_0's l1: 0.12119\n",
      "[5700]\tvalid_0's l1: 0.120824\n",
      "[6000]\tvalid_0's l1: 0.120506\n",
      "[6300]\tvalid_0's l1: 0.120199\n",
      "[6600]\tvalid_0's l1: 0.11992\n",
      "[6900]\tvalid_0's l1: 0.119652\n",
      "[7200]\tvalid_0's l1: 0.119376\n",
      "[7500]\tvalid_0's l1: 0.11914\n",
      "[7800]\tvalid_0's l1: 0.118929\n",
      "[8100]\tvalid_0's l1: 0.118722\n",
      "[8400]\tvalid_0's l1: 0.118517\n",
      "[8700]\tvalid_0's l1: 0.118351\n",
      "[9000]\tvalid_0's l1: 0.11818\n",
      "[9300]\tvalid_0's l1: 0.118024\n",
      "[9600]\tvalid_0's l1: 0.117857\n",
      "[9900]\tvalid_0's l1: 0.117702\n",
      "--------------------- 第 2 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.206614\n",
      "[600]\tvalid_0's l1: 0.157663\n",
      "[900]\tvalid_0's l1: 0.146292\n",
      "[1200]\tvalid_0's l1: 0.140713\n",
      "[1500]\tvalid_0's l1: 0.136949\n",
      "[1800]\tvalid_0's l1: 0.134325\n",
      "[2100]\tvalid_0's l1: 0.1324\n",
      "[2400]\tvalid_0's l1: 0.13092\n",
      "[2700]\tvalid_0's l1: 0.129698\n",
      "[3000]\tvalid_0's l1: 0.128714\n",
      "[3300]\tvalid_0's l1: 0.127872\n",
      "[3600]\tvalid_0's l1: 0.12712\n",
      "[3900]\tvalid_0's l1: 0.126479\n",
      "[4200]\tvalid_0's l1: 0.125861\n",
      "[4500]\tvalid_0's l1: 0.125354\n",
      "[4800]\tvalid_0's l1: 0.12489\n",
      "[5100]\tvalid_0's l1: 0.124482\n",
      "[5400]\tvalid_0's l1: 0.124063\n",
      "[5700]\tvalid_0's l1: 0.123727\n",
      "[6000]\tvalid_0's l1: 0.123394\n",
      "[6300]\tvalid_0's l1: 0.123082\n",
      "[6600]\tvalid_0's l1: 0.122805\n",
      "[6900]\tvalid_0's l1: 0.122519\n",
      "[7200]\tvalid_0's l1: 0.122255\n",
      "[7500]\tvalid_0's l1: 0.122013\n",
      "[7800]\tvalid_0's l1: 0.12179\n",
      "[8100]\tvalid_0's l1: 0.121587\n",
      "[8400]\tvalid_0's l1: 0.121398\n",
      "[8700]\tvalid_0's l1: 0.121207\n",
      "[9000]\tvalid_0's l1: 0.121008\n",
      "[9300]\tvalid_0's l1: 0.120847\n",
      "[9600]\tvalid_0's l1: 0.120699\n",
      "[9900]\tvalid_0's l1: 0.120546\n",
      "--------------------- 第 3 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.206671\n",
      "[600]\tvalid_0's l1: 0.154746\n",
      "[900]\tvalid_0's l1: 0.143581\n",
      "[1200]\tvalid_0's l1: 0.138095\n",
      "[1500]\tvalid_0's l1: 0.134365\n",
      "[1800]\tvalid_0's l1: 0.13185\n",
      "[2100]\tvalid_0's l1: 0.130064\n",
      "[2400]\tvalid_0's l1: 0.12862\n",
      "[2700]\tvalid_0's l1: 0.127535\n",
      "[3000]\tvalid_0's l1: 0.126604\n",
      "[3300]\tvalid_0's l1: 0.125787\n",
      "[3600]\tvalid_0's l1: 0.125115\n",
      "[3900]\tvalid_0's l1: 0.124501\n",
      "[4200]\tvalid_0's l1: 0.123964\n",
      "[4500]\tvalid_0's l1: 0.123465\n",
      "[4800]\tvalid_0's l1: 0.122997\n",
      "[5100]\tvalid_0's l1: 0.122592\n",
      "[5400]\tvalid_0's l1: 0.122218\n",
      "[5700]\tvalid_0's l1: 0.121885\n",
      "[6000]\tvalid_0's l1: 0.121584\n",
      "[6300]\tvalid_0's l1: 0.121313\n",
      "[6600]\tvalid_0's l1: 0.121075\n",
      "[6900]\tvalid_0's l1: 0.120827\n",
      "[7200]\tvalid_0's l1: 0.1206\n",
      "[7500]\tvalid_0's l1: 0.120376\n",
      "[7800]\tvalid_0's l1: 0.120172\n",
      "[8100]\tvalid_0's l1: 0.119967\n",
      "[8400]\tvalid_0's l1: 0.119787\n",
      "[8700]\tvalid_0's l1: 0.119586\n",
      "[9000]\tvalid_0's l1: 0.11942\n",
      "[9300]\tvalid_0's l1: 0.119233\n",
      "[9600]\tvalid_0's l1: 0.119074\n",
      "[9900]\tvalid_0's l1: 0.118905\n",
      "--------------------- 第 4 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.206916\n",
      "[600]\tvalid_0's l1: 0.154395\n",
      "[900]\tvalid_0's l1: 0.143016\n",
      "[1200]\tvalid_0's l1: 0.137443\n",
      "[1500]\tvalid_0's l1: 0.13382\n",
      "[1800]\tvalid_0's l1: 0.131369\n",
      "[2100]\tvalid_0's l1: 0.129531\n",
      "[2400]\tvalid_0's l1: 0.128078\n",
      "[2700]\tvalid_0's l1: 0.126927\n",
      "[3000]\tvalid_0's l1: 0.125984\n",
      "[3300]\tvalid_0's l1: 0.125108\n",
      "[3600]\tvalid_0's l1: 0.124386\n",
      "[3900]\tvalid_0's l1: 0.123784\n",
      "[4200]\tvalid_0's l1: 0.123274\n",
      "[4500]\tvalid_0's l1: 0.122805\n",
      "[4800]\tvalid_0's l1: 0.122377\n",
      "[5100]\tvalid_0's l1: 0.12196\n",
      "[5400]\tvalid_0's l1: 0.121566\n",
      "[5700]\tvalid_0's l1: 0.121245\n",
      "[6000]\tvalid_0's l1: 0.120938\n",
      "[6300]\tvalid_0's l1: 0.120644\n",
      "[6600]\tvalid_0's l1: 0.120377\n",
      "[6900]\tvalid_0's l1: 0.120117\n",
      "[7200]\tvalid_0's l1: 0.119848\n",
      "[7500]\tvalid_0's l1: 0.119616\n",
      "[7800]\tvalid_0's l1: 0.119422\n",
      "[8100]\tvalid_0's l1: 0.119203\n",
      "[8400]\tvalid_0's l1: 0.119053\n",
      "[8700]\tvalid_0's l1: 0.118883\n",
      "[9000]\tvalid_0's l1: 0.11871\n",
      "[9300]\tvalid_0's l1: 0.118539\n",
      "[9600]\tvalid_0's l1: 0.118383\n",
      "[9900]\tvalid_0's l1: 0.118248\n",
      "--------------------- 第 5 折 ---------------------\n",
      "[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8\n",
      "[LightGBM] [Warning] lambda_l2 is set=2, reg_lambda=0.0 will be ignored. Current value: lambda_l2=2\n",
      "[LightGBM] [Warning] bagging_freq is set=1, subsample_freq=0 will be ignored. Current value: bagging_freq=1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "D:\\Anaconda\\Anaconda3\\lib\\site-packages\\lightgbm\\sklearn.py:736: UserWarning: 'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose' argument is deprecated and will be removed in a future release of LightGBM. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[300]\tvalid_0's l1: 0.207306\n",
      "[600]\tvalid_0's l1: 0.153765\n",
      "[900]\tvalid_0's l1: 0.142645\n",
      "[1200]\tvalid_0's l1: 0.137034\n",
      "[1500]\tvalid_0's l1: 0.133251\n",
      "[1800]\tvalid_0's l1: 0.130712\n",
      "[2100]\tvalid_0's l1: 0.128811\n",
      "[2400]\tvalid_0's l1: 0.127405\n",
      "[2700]\tvalid_0's l1: 0.126238\n",
      "[3000]\tvalid_0's l1: 0.125317\n",
      "[3300]\tvalid_0's l1: 0.124509\n",
      "[3600]\tvalid_0's l1: 0.123798\n",
      "[3900]\tvalid_0's l1: 0.123153\n",
      "[4200]\tvalid_0's l1: 0.122634\n",
      "[4500]\tvalid_0's l1: 0.122157\n",
      "[4800]\tvalid_0's l1: 0.121686\n",
      "[5100]\tvalid_0's l1: 0.121273\n",
      "[5400]\tvalid_0's l1: 0.12089\n",
      "[5700]\tvalid_0's l1: 0.120515\n",
      "[6000]\tvalid_0's l1: 0.120166\n",
      "[6300]\tvalid_0's l1: 0.119835\n",
      "[6600]\tvalid_0's l1: 0.119546\n",
      "[6900]\tvalid_0's l1: 0.119246\n",
      "[7200]\tvalid_0's l1: 0.118968\n",
      "[7500]\tvalid_0's l1: 0.118748\n",
      "[7800]\tvalid_0's l1: 0.118536\n",
      "[8100]\tvalid_0's l1: 0.118313\n",
      "[8400]\tvalid_0's l1: 0.118102\n",
      "[8700]\tvalid_0's l1: 0.117908\n",
      "[9000]\tvalid_0's l1: 0.117737\n",
      "[9300]\tvalid_0's l1: 0.117562\n",
      "[9600]\tvalid_0's l1: 0.117394\n",
      "[9900]\tvalid_0's l1: 0.117239\n",
      "交叉验证 MAE: 509.0537280387642\n"
     ]
    }
   ],
   "source": [
    "# 1. 使用 train_test_split 从训练集中随机选择 20% 数据用于训练\n",
    "X_train_sub, _, y_train_sub, _ = train_test_split(X_train, y_train, test_size=0.8, random_state=17)\n",
    "\n",
    "# 2. 创建基础的 LGBMRegressor 模型\n",
    "clf = LGBMRegressor(\n",
    "    n_estimators=10000,\n",
    "    boosting_type='gbdt',\n",
    "    objective='regression_l1',\n",
    "    num_leaves=31,\n",
    "    min_child_samples=20,\n",
    "    bagging_freq=1,\n",
    "    bagging_fraction=0.8,\n",
    "    lambda_l2=2,\n",
    "    random_state=17,\n",
    "    metric='mae'\n",
    ")\n",
    "\n",
    "#网格\n",
    "# 3. 设置参数网格\n",
    "param_grid = {\n",
    "    'learning_rate': [0.01, 0.05, 0.07],\n",
    "    'max_depth': [-1, 10],\n",
    "    'feature_fraction': [0.5, 0.8]\n",
    "}\n",
    "\n",
    "# 4. 使用网格搜索找到最佳超参数\n",
    "grid_search = GridSearchCV(\n",
    "    estimator=clf, \n",
    "    param_grid=param_grid, \n",
    "    scoring='neg_mean_absolute_error',  # 使用负的 MAE 来进行评分\n",
    "    cv=5,                               # 使用 5 折交叉验证\n",
    "    verbose=1, \n",
    "    n_jobs=-1\n",
    ")\n",
    "\n",
    "# 执行网格搜索\n",
    "grid_search.fit(X_train_sub, y_train_sub)\n",
    "\n",
    "# 输出最优参数\n",
    "print(\"最优参数：\", grid_search.best_params_)\n",
    "\n",
    "# 5. 使用最优参数训练最终模型\n",
    "best_model = grid_search.best_estimator_\n",
    "\n",
    "# 6. 进行5折交叉验证\n",
    "oof = np.zeros(X_train.shape[0])\n",
    "sub = test[['SaleID']].copy()\n",
    "sub['price'] = 0\n",
    "feat_df = pd.DataFrame({'feat': cols, 'imp': 0})\n",
    "skf = KFold(n_splits=5, shuffle=True, random_state=17)\n",
    "\n",
    "mae = 0\n",
    "for i, (trn_idx, val_idx) in enumerate(skf.split(X_train, y_train)):\n",
    "    print('--------------------- 第 {} 折 ---------------------'.format(i + 1))\n",
    "    \n",
    "    # 训练集和验证集的切分\n",
    "    trn_x, trn_y = X_train.iloc[trn_idx].reset_index(drop=True), y_train[trn_idx]\n",
    "    val_x, val_y = X_train.iloc[val_idx].reset_index(drop=True), y_train[val_idx]\n",
    "    \n",
    "    # 使用最优模型进行训练\n",
    "    best_model.fit(\n",
    "        trn_x, trn_y,\n",
    "        eval_set=[(val_x, val_y)],\n",
    "        eval_metric='mae',\n",
    "        early_stopping_rounds=300,\n",
    "        verbose=300\n",
    "    )\n",
    "    \n",
    "    # 预测并累加结果\n",
    "    sub['price'] += np.expm1(best_model.predict(X_test)) / skf.n_splits\n",
    "    oof[val_idx] = best_model.predict(val_x)\n",
    "    \n",
    "    # 计算每折的 MAE\n",
    "    mae += mean_absolute_error(np.expm1(val_y), np.expm1(oof[val_idx])) / skf.n_splits\n",
    "\n",
    "print('交叉验证 MAE:', mae)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "#输出\n",
    "sub.to_csv('submit.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
